ret = dom0vp_add_physmap(d, arg0, arg1, (unsigned int)arg2,
(domid_t)arg3);
break;
+ case IA64_DOM0VP_expose_p2m:
+ ret = dom0vp_expose_p2m(d, arg0, arg1, arg2, arg3);
+ break;
default:
ret = -1;
- printf("unknown dom0_vp_op 0x%lx\n", cmd);
+ printk("unknown dom0_vp_op 0x%lx\n", cmd);
break;
}
}
break;
case PAL_HALT:
- if (current->domain == dom0) {
- printk ("Domain0 halts the machine\n");
- console_start_sync();
- (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL);
- }
- else
- domain_shutdown (current->domain,
- SHUTDOWN_poweroff);
- break;
+ if (current->domain == dom0) {
- printf ("Domain0 halts the machine\n");
++ printk ("Domain0 halts the machine\n");
+ console_start_sync();
+ (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL);
+ }
+ else
+ domain_shutdown(current->domain, SHUTDOWN_poweroff);
+ break;
+ case PAL_HALT_LIGHT:
+ if (VMX_DOMAIN(current)) {
+ /* Called by VTI. */
+ if (!is_unmasked_irq(current))
+ do_sched_op_compat(SCHEDOP_block, 0);
+ status = PAL_STATUS_SUCCESS;
+ }
+ break;
+ case PAL_PLATFORM_ADDR:
+ if (VMX_DOMAIN(current))
+ status = PAL_STATUS_SUCCESS;
+ break;
default:
printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %lu!!!!\n",
index);
perfc_incra(fw_hypercall, index >> 8);
switch (index) {
- case FW_HYPERCALL_PAL_CALL:
+ case FW_HYPERCALL_XEN:
+ return xen_hypercall(regs);
+
+ case FW_HYPERCALL_XEN_FAST:
+ return xen_fast_hypercall(regs);
+
+ case FW_HYPERCALL_PAL_CALL:
- //printf("*** PAL hypercall: index=%d\n",regs->r28);
+ //printk("*** PAL hypercall: index=%d\n",regs->r28);
//FIXME: This should call a C routine
#if 0
// This is very conservative, but avoids a possible
regs->r10 = fpswa_ret.err1;
regs->r11 = fpswa_ret.err2;
break;
- default:
+ default:
- printf("unknown ia64 fw hypercall %lx\n", regs->r2);
+ printk("unknown ia64 fw hypercall %lx\n", regs->r2);
regs->r8 = do_ni_hypercall();
}
return IA64_NO_FAULT;
}
#endif
- printf("%s: called with bad memory address: 0x%lx - iip=%lx\n",
+unsigned long
+xencomm_paddr_to_maddr(unsigned long paddr)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ u64 pa;
+
+ pa = ____lookup_domain_mpa(d, paddr);
+ if (pa == INVALID_MFN) {
++ printk("%s: called with bad memory address: 0x%lx - iip=%lx\n",
+ __func__, paddr, vcpu_regs(v)->cr_iip);
+ return 0;
+ }
+ return __va_ul((pa & _PFN_MASK) | (paddr & ~PAGE_MASK));
+}
+
/* Allocate a new page for domain and map it to the specified metaphysical
address. */
static struct page_info *
//return vcpu_force_data_miss(vcpu, regs->cr_iip);
return vcpu_force_inst_miss(vcpu, regs->cr_iip);
}
-
#if 0
- if (iip==0xa000000100001820) {
+ if (iip == 0xa000000100001820) {
static int firstpagefault = 1;
if (firstpagefault) {
- printf("*** First time to domain page fault!\n");
- printk("*** First time to domain page fault!\n"); firstpagefault=0;
++ printk("*** First time to domain page fault!\n");
+ firstpagefault = 0;
}
}
#endif
if (privop_trace) {
static long i = 400;
- //if (i > 0) printf("priv_handle_op: at 0x%lx\n",iip);
+ //if (i > 0) printk("priv_handle_op: at 0x%lx\n",iip);
- if (i > 0) printk("priv_handle_op: privop trace at 0x%lx, itc=%lx, itm=%lx\n",
- iip,ia64_get_itc(),ia64_get_itm());
+ if (i > 0)
- printf("priv_handle_op: privop trace at 0x%lx, "
++ printk("priv_handle_op: privop trace at 0x%lx, "
+ "itc=%lx, itm=%lx\n",
+ iip, ia64_get_itc(), ia64_get_itm());
i--;
}
slot = ((struct ia64_psr *)&ipsr)->ri;
- if (!slot) inst.inst = (bundle.i64[0]>>5) & MASK_41;
+ if (!slot)
+ inst.inst = (bundle.i64[0] >> 5) & MASK_41;
else if (slot == 1)
- inst.inst = ((bundle.i64[0]>>46) | bundle.i64[1]<<18) & MASK_41;
- else if (slot == 2) inst.inst = (bundle.i64[1]>>23) & MASK_41;
- else printk("priv_handle_op: illegal slot: %d\n", slot);
+ inst.inst =
+ ((bundle.i64[0] >> 46) | bundle.i64[1] << 18) & MASK_41;
+ else if (slot == 2)
+ inst.inst = (bundle.i64[1] >> 23) & MASK_41;
+ else
- printf("priv_handle_op: illegal slot: %d\n", slot);
++ printk("priv_handle_op: illegal slot: %d\n", slot);
slot_type = slot_types[bundle.template][slot];
if (priv_verbose) {
- printf("priv_handle_op: checking bundle at 0x%lx "
- printk("priv_handle_op: checking bundle at 0x%lx (op=0x%016lx) slot %d (type=%d)\n",
- iip, (UINT64)inst.inst, slot, slot_type);
++ printk("priv_handle_op: checking bundle at 0x%lx "
+ "(op=0x%016lx) slot %d (type=%d)\n",
+ iip, (u64) inst.inst, slot, slot_type);
}
if (slot_type == B && inst.generic.major == 0 && inst.B8.x6 == 0x0) {
// break instr for privified cover
}
if (inst.I27.x6 == 0x0a) {
perfc_incrc(mov_to_ar_imm);
- return priv_mov_to_ar_imm(vcpu,inst);
+ return priv_mov_to_ar_imm(vcpu, inst);
}
break;
- default:
+ default:
break;
}
- //printf("We who are about do die salute you\n");
- printf("priv_handle_op: can't handle privop at 0x%lx (op=0x%016lx) "
- //printk("We who are about do die salute you\n");
- printk("priv_handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d (type=%d), ipsr=0x%lx\n",
- iip, (UINT64)inst.inst, slot, slot_type, ipsr);
- //printk("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
- //thread_mozambique("privop fault\n");
- return (IA64_ILLOP_FAULT);
++ //printk("We who are about do die salute you\n");
++ printk("priv_handle_op: can't handle privop at 0x%lx (op=0x%016lx) "
+ "slot %d (type=%d), ipsr=0x%lx\n",
+ iip, (u64) inst.inst, slot, slot_type, ipsr);
- //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
++ //printk("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
+ //thread_mozambique("privop fault\n");
+ return IA64_ILLOP_FAULT;
}
/** Emulate a privileged operation.
// handle privops masked as illops? and breaks (6)
if (isrcode != 1 && isrcode != 2 && isrcode != 0 && isrcode != 6) {
- printf("priv_emulate: isrcode != 0 or 1 or 2\n");
- printf("priv_emulate: returning ILLOP, not implemented!\n");
- printk("priv_emulate: isrcode != 0 or 1 or 2\n");
++ printk("priv_emulate: isrcode != 0 or 1 or 2\n");
+ printk("priv_emulate: returning ILLOP, not implemented!\n");
- while (1);
+ while (1) ;
return IA64_ILLOP_FAULT;
}
//if (isrcode != 1 && isrcode != 2) return 0;
(void)vcpu_increment_iip(vcpu);
}
if (fault == IA64_ILLOP_FAULT)
- printf("priv_emulate: priv_handle_op fails, "
+ printk("priv_emulate: priv_handle_op fails, "
- "isr=0x%lx iip=%lx\n",isr, regs->cr_iip);
+ "isr=0x%lx iip=%lx\n", isr, regs->cr_iip);
return fault;
}
--- /dev/null
- printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+/******************************************************************************
+ * tlb_track.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <asm/tlb_track.h>
+#include <asm/p2m_entry.h>
+#include <asm/vmx_mm_def.h> /* for IA64_RR_SHIFT */
+#include <asm/vmx_vcpu.h> /* for VRN7 */
+#include <asm/vcpu.h> /* for PSCB() */
+
+#define CONFIG_TLB_TRACK_DEBUG
+#ifdef CONFIG_TLB_TRACK_DEBUG
+# define tlb_track_printd(fmt, ...) \
++ printk("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#else
+# define tlb_track_printd(fmt, ...) do { } while (0)
+#endif
+
+static int
+tlb_track_allocate_entries(struct tlb_track* tlb_track)
+{
+ struct page_info* entry_page;
+ struct tlb_track_entry* track_entries;
+ unsigned int allocated;
+ unsigned long i;
+
+ BUG_ON(tlb_track->num_free > 0);
+ if (tlb_track->num_entries >= tlb_track->limit) {
+ DPRINTK("%s: num_entries %d limit %d\n",
+ __func__, tlb_track->num_entries, tlb_track->limit);
+ return -ENOMEM;
+ }
+ entry_page = alloc_domheap_page(NULL);
+ if (entry_page == NULL) {
+ DPRINTK("%s: domheap page failed. num_entries %d limit %d\n",
+ __func__, tlb_track->num_entries, tlb_track->limit);
+ return -ENOMEM;
+ }
+
+ list_add(&entry_page->list, &tlb_track->page_list);
+ track_entries = (struct tlb_track_entry*)page_to_virt(entry_page);
+ allocated = PAGE_SIZE / sizeof(track_entries[0]);
+ tlb_track->num_entries += allocated;
+ tlb_track->num_free += allocated;
+ for (i = 0; i < allocated; i++) {
+ list_add(&track_entries[i].list, &tlb_track->free_list);
+ // tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]);
+ }
+ tlb_track_printd("allocated %d num_entries %d num_free %d\n",
+ allocated, tlb_track->num_entries, tlb_track->num_free);
+ return 0;
+}
+
+
+int
+tlb_track_create(struct domain* d)
+{
+ struct tlb_track* tlb_track = NULL;
+ struct page_info* hash_page = NULL;
+ unsigned int hash_size;
+ unsigned int hash_shift;
+ unsigned int i;
+
+ tlb_track = xmalloc(struct tlb_track);
+ if (tlb_track == NULL)
+ goto out;
+
+ hash_page = alloc_domheap_page(NULL);
+ if (hash_page == NULL)
+ goto out;
+
+ spin_lock_init(&tlb_track->free_list_lock);
+ INIT_LIST_HEAD(&tlb_track->free_list);
+ tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES;
+ tlb_track->num_entries = 0;
+ tlb_track->num_free = 0;
+ INIT_LIST_HEAD(&tlb_track->page_list);
+ if (tlb_track_allocate_entries(tlb_track) < 0)
+ goto out;
+
+ spin_lock_init(&tlb_track->hash_lock);
+ /* XXX hash size optimization */
+ hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]);
+ for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++)
+ /* nothing */;
+ tlb_track->hash_size = (1 << hash_shift);
+ tlb_track->hash_shift = hash_shift;
+ tlb_track->hash_mask = (1 << hash_shift) - 1;
+ tlb_track->hash = page_to_virt(hash_page);
+ for (i = 0; i < tlb_track->hash_size; i++)
+ INIT_LIST_HEAD(&tlb_track->hash[i]);
+
+ smp_mb(); /* make initialization visible before use. */
+ d->arch.tlb_track = tlb_track;
+ printk("%s:%d hash 0x%p hash_size %d \n",
+ __func__, __LINE__, tlb_track->hash, tlb_track->hash_size);
+
+ return 0;
+
+out:
+ if (hash_page != NULL)
+ free_domheap_page(hash_page);
+
+ if (tlb_track != NULL)
+ xfree(tlb_track);
+
+ return -ENOMEM;
+}
+
+void
+tlb_track_destroy(struct domain* d)
+{
+ struct tlb_track* tlb_track = d->arch.tlb_track;
+ struct page_info* page;
+ struct page_info* next;
+
+ spin_lock(&tlb_track->free_list_lock);
+ BUG_ON(tlb_track->num_free != tlb_track->num_entries);
+
+ list_for_each_entry_safe(page, next, &tlb_track->page_list, list) {
+ list_del(&page->list);
+ free_domheap_page(page);
+ }
+
+ free_domheap_page(virt_to_page(tlb_track->hash));
+ xfree(tlb_track);
+ // d->tlb_track = NULL;
+}
+
+static struct tlb_track_entry*
+tlb_track_get_entry(struct tlb_track* tlb_track)
+{
+ struct tlb_track_entry* entry = NULL;
+ spin_lock(&tlb_track->free_list_lock);
+ if (tlb_track->num_free == 0)
+ (void)tlb_track_allocate_entries(tlb_track);
+
+ if (tlb_track->num_free > 0) {
+ BUG_ON(list_empty(&tlb_track->free_list));
+ entry = list_entry(tlb_track->free_list.next,
+ struct tlb_track_entry, list);
+ tlb_track->num_free--;
+ list_del(&entry->list);
+ }
+ spin_unlock(&tlb_track->free_list_lock);
+ return entry;
+}
+
+void
+tlb_track_free_entry(struct tlb_track* tlb_track,
+ struct tlb_track_entry* entry)
+{
+ spin_lock(&tlb_track->free_list_lock);
+ list_add(&entry->list, &tlb_track->free_list);
+ tlb_track->num_free++;
+ spin_unlock(&tlb_track->free_list_lock);
+}
+
+
+#include <linux/hash.h>
+/* XXX hash function. */
+static struct list_head*
+tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep)
+{
+ unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift);
+ BUG_ON(hash >= tlb_track->hash_size);
+ BUG_ON((hash & tlb_track->hash_mask) != hash);
+ return &tlb_track->hash[hash];
+}
+
+static int
+tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte)
+{
+ if (pte_pfn(old_pte) != pte_pfn(ret_pte) ||
+ (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) !=
+ (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) {
+ /* Other thread zapped the p2m entry. */
+ return 1;
+ }
+ return 0;
+}
+
+static TLB_TRACK_RET_T
+tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm,
+ volatile pte_t* ptep, pte_t old_pte,
+ unsigned long vaddr, unsigned long rid)
+{
+ unsigned long mfn = pte_pfn(old_pte);
+ struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
+ struct tlb_track_entry* entry;
+ struct tlb_track_entry* new_entry = NULL;
+ unsigned long bit_to_be_set = _PAGE_TLB_INSERTED;
+ pte_t new_pte;
+ pte_t ret_pte;
+
+ struct vcpu* v = current;
+ TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND;
+
+#if 0 /* this is done at vcpu_tlb_track_insert_or_dirty() */
+ perfc_incrc(tlb_track_iod);
+ if (!pte_tlb_tracking(old_pte)) {
+ perfc_incrc(tlb_track_iod_not_tracked);
+ return TLB_TRACK_NOT_TRACKED;
+ }
+#endif
+ if (pte_tlb_inserted_many(old_pte)) {
+ perfc_incrc(tlb_track_iod_tracked_many);
+ return TLB_TRACK_MANY;
+ }
+
+ /* vaddr must be normalized so that it is in vrn7 and page aligned. */
+ BUG_ON((vaddr >> IA64_RR_SHIFT) != VRN7);
+ BUG_ON((vaddr & ~PAGE_MASK) != 0);
+#if 0
+ tlb_track_printd("\n"
+ "\tmfn 0x%016lx\n"
+ "\told_pte 0x%016lx ptep 0x%p\n"
+ "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n"
+ "\ttlb_track 0x%p head 0x%p\n",
+ mfn,
+ pte_val(old_pte), ptep, pte_val(*ptep),
+ vaddr, rid,
+ tlb_track, head);
+#endif
+
+ again:
+ /*
+ * zapping side may zap the p2m entry and then remove tlb track entry
+ * non-atomically. We may see the stale tlb track entry here.
+ * p2m_entry_retry() handles such a case.
+ * Or other thread may zap the p2m entry and remove tlb track entry
+ * and inserted new tlb track entry.
+ */
+ spin_lock(&tlb_track->hash_lock);
+ list_for_each_entry(entry, head, list) {
+ if (entry->ptep != ptep)
+ continue;
+
+ if (pte_pfn(entry->pte_val) == mfn) {
+ // tlb_track_entry_printf(entry);
+ if (entry->vaddr == vaddr && entry->rid == rid) {
+ // tlb_track_printd("TLB_TRACK_FOUND\n");
+ ret = TLB_TRACK_FOUND;
+ perfc_incrc(tlb_track_iod_found);
+#ifdef CONFIG_TLB_TRACK_CNT
+ entry->cnt++;
+ if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) {
+ /*
+ * heuristics:
+ * If a page is used to transfer data by dev channel,
+ * it would be unmapped with small amount access
+ * (once or twice tlb insert) after real device
+ * I/O completion. It would be short period.
+ * However this page seems to be accessed many times.
+ * We guess that this page is used I/O ring
+ * so that tracking this entry might be useless.
+ */
+ // tlb_track_entry_printf(entry);
+ // tlb_track_printd("cnt = %ld\n", entry->cnt);
+ perfc_incrc(tlb_track_iod_force_many);
+ goto force_many;
+ }
+#endif
+ goto found;
+ } else {
+#ifdef CONFIG_TLB_TRACK_CNT
+ force_many:
+#endif
+ if (!pte_tlb_inserted(old_pte)) {
+ printk("%s:%d racy update\n", __func__, __LINE__);
+ old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED);
+ }
+ new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY);
+ ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
+ if (pte_val(ret_pte) != pte_val(old_pte)) {
+ // tlb_track_printd("TLB_TRACK_AGAIN\n");
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ } else {
+ // tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n",
+ // entry);
+ ret = TLB_TRACK_MANY;
+ list_del(&entry->list);
+ // tlb_track_entry_printf(entry);
+ perfc_incrc(tlb_track_iod_tracked_many_del);
+ }
+ goto out;
+ }
+ }
+
+ /*
+ * Other thread changed the p2m entry and removed and inserted new
+ * tlb tracn entry after we get old_pte, but before we get
+ * spinlock.
+ */
+ // tlb_track_printd("TLB_TRACK_AGAIN\n");
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ goto out;
+ }
+
+ entry = NULL; // prevent freeing entry.
+ if (pte_tlb_inserted(old_pte)) {
+ /* Other thread else removed the tlb_track_entry after we got old_pte
+ before we got spin lock. */
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ goto out;
+ }
+ if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) {
+ spin_unlock(&tlb_track->hash_lock);
+ new_entry = tlb_track_get_entry(tlb_track);
+ if (new_entry == NULL) {
+ tlb_track_printd("get_entry failed\n");
+ /* entry can't be allocated.
+ fall down into full flush mode. */
+ bit_to_be_set |= _PAGE_TLB_INSERTED_MANY;
+ perfc_incrc(tlb_track_iod_new_failed);
+ }
+ // tlb_track_printd("new_entry 0x%p\n", new_entry);
+ perfc_incrc(tlb_track_iod_new_entry);
+ goto again;
+ }
+
+ BUG_ON(pte_tlb_inserted_many(old_pte));
+ new_pte = __pte(pte_val(old_pte) | bit_to_be_set);
+ ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
+ if (pte_val(old_pte) != pte_val(ret_pte)) {
+ if (tlb_track_pte_zapped(old_pte, ret_pte)) {
+ // tlb_track_printd("zapped TLB_TRACK_AGAIN\n");
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ goto out;
+ }
+
+ /* Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY */
+ if (pte_tlb_inserted_many(ret_pte)) {
+ /* Other thread already set _PAGE_TLB_INSERTED_MANY and
+ removed the entry. */
+ // tlb_track_printd("iserted TLB_TRACK_MANY\n");
+ BUG_ON(!pte_tlb_inserted(ret_pte));
+ ret = TLB_TRACK_MANY;
+ perfc_incrc(tlb_track_iod_new_many);
+ goto out;
+ }
+ BUG_ON(pte_tlb_inserted(ret_pte));
+ BUG();
+ }
+ if (new_entry) {
+ // tlb_track_printd("iserting new_entry 0x%p\n", new_entry);
+ entry = new_entry;
+ new_entry = NULL;
+
+ entry->ptep = ptep;
+ entry->pte_val = old_pte;
+ entry->vaddr = vaddr;
+ entry->rid = rid;
+ cpus_clear(entry->pcpu_dirty_mask);
+ vcpus_clear(entry->vcpu_dirty_mask);
+ list_add(&entry->list, head);
+
+#ifdef CONFIG_TLB_TRACK_CNT
+ entry->cnt = 0;
+#endif
+ perfc_incrc(tlb_track_iod_insert);
+ // tlb_track_entry_printf(entry);
+ } else {
+ goto out;
+ }
+
+ found:
+ BUG_ON(v->processor >= NR_CPUS);
+ cpu_set(v->processor, entry->pcpu_dirty_mask);
+ BUG_ON(v->vcpu_id >= NR_CPUS);
+ vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask);
+ perfc_incrc(tlb_track_iod_dirtied);
+
+ out:
+ spin_unlock(&tlb_track->hash_lock);
+ if (ret == TLB_TRACK_MANY && entry != NULL)
+ tlb_track_free_entry(tlb_track, entry);
+ if (new_entry != NULL)
+ tlb_track_free_entry(tlb_track, new_entry);
+ return ret;
+}
+
+void
+__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+ struct p2m_entry* entry)
+{
+ unsigned long vrn = vaddr >> IA64_RR_SHIFT;
+ unsigned long rid = PSCB(vcpu, rrs[vrn]);
+ TLB_TRACK_RET_T ret;
+
+ /* normalize vrn7
+ When linux dom0 case, vrn7 is the most common case. */
+ vaddr |= VRN7 << VRN_SHIFT;
+ vaddr &= PAGE_MASK;
+ ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track,
+ &vcpu->domain->arch.mm,
+ entry->ptep, entry->used,
+ vaddr, rid);
+ if (ret == TLB_TRACK_AGAIN)
+ p2m_entry_set_retry(entry);
+}
+
+TLB_TRACK_RET_T
+tlb_track_search_and_remove(struct tlb_track* tlb_track,
+ volatile pte_t* ptep, pte_t old_pte,
+ struct tlb_track_entry** entryp)
+{
+ unsigned long mfn = pte_pfn(old_pte);
+ struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
+ struct tlb_track_entry* entry;
+
+ perfc_incrc(tlb_track_sar);
+ if (!pte_tlb_tracking(old_pte)) {
+ perfc_incrc(tlb_track_sar_not_tracked);
+ return TLB_TRACK_NOT_TRACKED;
+ }
+ if (!pte_tlb_inserted(old_pte)) {
+ BUG_ON(pte_tlb_inserted_many(old_pte));
+ perfc_incrc(tlb_track_sar_not_found);
+ return TLB_TRACK_NOT_FOUND;
+ }
+ if (pte_tlb_inserted_many(old_pte)) {
+ BUG_ON(!pte_tlb_inserted(old_pte));
+ perfc_incrc(tlb_track_sar_many);
+ return TLB_TRACK_MANY;
+ }
+
+ spin_lock(&tlb_track->hash_lock);
+ list_for_each_entry(entry, head, list) {
+ if (entry->ptep != ptep)
+ continue;
+
+ if (pte_pfn(entry->pte_val) == mfn) {
+ list_del(&entry->list);
+ spin_unlock(&tlb_track->hash_lock);
+ *entryp = entry;
+ perfc_incrc(tlb_track_sar_found);
+ // tlb_track_entry_printf(entry);
+#ifdef CONFIG_TLB_TRACK_CNT
+ // tlb_track_printd("cnt = %ld\n", entry->cnt);
+#endif
+ return TLB_TRACK_FOUND;
+ }
+ BUG();
+ }
+ BUG();
+ spin_unlock(&tlb_track->hash_lock);
+ return TLB_TRACK_NOT_TRACKED;
+}
+
+/* for debug */
+void
+__tlb_track_entry_printf(const char* func, int line,
+ const struct tlb_track_entry* entry)
+{
+ char pcpumask_buf[NR_CPUS + 1];
+ char vcpumask_buf[MAX_VIRT_CPUS + 1];
+ cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf),
+ entry->pcpu_dirty_mask);
+ vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf),
+ entry->vcpu_dirty_mask);
+ printk("%s:%d\n"
+ "\tmfn 0x%016lx\n"
+ "\told_pte 0x%016lx ptep 0x%p\n"
+ "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n"
+ "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n"
+ "\tentry 0x%p\n",
+ func, line,
+ pte_pfn(entry->pte_val),
+ pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep),
+ entry->vaddr, entry->rid,
+ pcpumask_buf, vcpumask_buf,
+ entry);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
/* GCC 4.0.2 seems not to be able to suppress this call!. */
#define ia64_setreg_unknown_kr() return IA64_ILLOP_FAULT
-IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val)
+IA64FAULT vcpu_set_ar(VCPU * vcpu, u64 reg, u64 val)
{
- if (reg == 44) return (vcpu_set_itc(vcpu,val));
- else if (reg == 27) return (IA64_ILLOP_FAULT);
+ if (reg == 44)
+ return vcpu_set_itc(vcpu, val);
+ else if (reg == 27)
+ return IA64_ILLOP_FAULT;
else if (reg == 24)
- printf("warning: setting ar.eflg is a no-op; no IA-32 "
- printk("warning: setting ar.eflg is a no-op; no IA-32 support\n");
- else if (reg > 7) return (IA64_ILLOP_FAULT);
++ printk("warning: setting ar.eflg is a no-op; no IA-32 "
+ "support\n");
+ else if (reg > 7)
+ return IA64_ILLOP_FAULT;
else {
- PSCB(vcpu,krs[reg]) = val;
- ia64_set_kr(reg,val);
+ PSCB(vcpu, krs[reg]) = val;
+ ia64_set_kr(reg, val);
}
return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
+IA64FAULT vcpu_get_ar(VCPU * vcpu, u64 reg, u64 * val)
{
if (reg == 24)
- printf("warning: getting ar.eflg is a no-op; no IA-32 "
- printk("warning: getting ar.eflg is a no-op; no IA-32 support\n");
- else if (reg > 7) return (IA64_ILLOP_FAULT);
- else *val = PSCB(vcpu,krs[reg]);
++ printk("warning: getting ar.eflg is a no-op; no IA-32 "
+ "support\n");
+ else if (reg > 7)
+ return IA64_ILLOP_FAULT;
+ else
+ *val = PSCB(vcpu, krs[reg]);
return IA64_NO_FAULT;
}
if (imm.pp) {
ipsr->pp = 1;
psr.pp = 1;
- PSCB(vcpu,vpsr_pp) = 1;
+ PSCB(vcpu, vpsr_pp) = 1;
+ }
+ if (imm.sp) {
+ ipsr->sp = 1;
+ psr.sp = 1;
}
- if (imm.sp) { ipsr->sp = 1; psr.sp = 1; }
if (imm.i) {
if (vcpu->vcpu_info->evtchn_upcall_mask) {
- //printf("vcpu_set_psr_sm: psr.ic 0->1\n");
+ //printk("vcpu_set_psr_sm: psr.ic 0->1\n");
enabling_interrupts = 1;
}
vcpu->vcpu_info->evtchn_upcall_mask = 0;
}
- if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
+ if (imm.ic)
+ PSCB(vcpu, interrupt_collection_enabled) = 1;
// TODO: do this faster
- if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
- if (imm.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
- if (imm.ac) { ipsr->ac = 1; psr.ac = 1; }
- if (imm.up) { ipsr->up = 1; psr.up = 1; }
+ if (imm.mfl) {
+ ipsr->mfl = 1;
+ psr.mfl = 1;
+ }
+ if (imm.mfh) {
+ ipsr->mfh = 1;
+ psr.mfh = 1;
+ }
+ if (imm.ac) {
+ ipsr->ac = 1;
+ psr.ac = 1;
+ }
+ if (imm.up) {
+ ipsr->up = 1;
+ psr.up = 1;
+ }
if (imm.be) {
- printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+ printk("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
- if (imm.dt) vcpu_set_metaphysical_mode(vcpu,FALSE);
- __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+ if (imm.dt)
+ vcpu_set_metaphysical_mode(vcpu, FALSE);
+ __asm__ __volatile(";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
if (enabling_interrupts &&
- vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
+ vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu, pending_interruption) = 1;
return IA64_NO_FAULT;
}
enabling_interrupts = 1;
vcpu->vcpu_info->evtchn_upcall_mask = 0;
}
- if (newpsr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
- if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
- if (newpsr.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
- if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; }
- if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
- if (newpsr.dt && newpsr.rt) vcpu_set_metaphysical_mode(vcpu,FALSE);
- else vcpu_set_metaphysical_mode(vcpu,TRUE);
+ if (newpsr.ic)
+ PSCB(vcpu, interrupt_collection_enabled) = 1;
+ if (newpsr.mfl) {
+ ipsr->mfl = 1;
+ psr.mfl = 1;
+ }
+ if (newpsr.mfh) {
+ ipsr->mfh = 1;
+ psr.mfh = 1;
+ }
+ if (newpsr.ac) {
+ ipsr->ac = 1;
+ psr.ac = 1;
+ }
+ if (newpsr.up) {
+ ipsr->up = 1;
+ psr.up = 1;
+ }
+ if (newpsr.dt && newpsr.rt)
+ vcpu_set_metaphysical_mode(vcpu, FALSE);
+ else
+ vcpu_set_metaphysical_mode(vcpu, TRUE);
if (newpsr.be) {
- printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+ printk("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
if (enabling_interrupts &&
- vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
+ vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu, pending_interruption) = 1;
return IA64_NO_FAULT;
}
return !vcpu->vcpu_info->evtchn_upcall_mask;
}
-UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr)
+u64 vcpu_get_ipsr_int_state(VCPU * vcpu, u64 prevpsr)
{
- UINT64 dcr = PSCBX(vcpu,dcr);
+ u64 dcr = PSCBX(vcpu, dcr);
PSR psr;
- //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...\n",prevpsr);
+ //printk("*** vcpu_get_ipsr_int_state (0x%016lx)...\n",prevpsr);
psr.i64 = prevpsr;
- psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1;
- psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1;
- psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled);
+ psr.ia64_psr.be = 0;
+ if (dcr & IA64_DCR_BE)
+ psr.ia64_psr.be = 1;
+ psr.ia64_psr.pp = 0;
+ if (dcr & IA64_DCR_PP)
+ psr.ia64_psr.pp = 1;
+ psr.ia64_psr.ic = PSCB(vcpu, interrupt_collection_enabled);
psr.ia64_psr.i = !vcpu->vcpu_info->evtchn_upcall_mask;
- psr.ia64_psr.bn = PSCB(vcpu,banknum);
- psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1;
- if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain
+ psr.ia64_psr.bn = PSCB(vcpu, banknum);
+ psr.ia64_psr.dt = 1;
+ psr.ia64_psr.it = 1;
+ psr.ia64_psr.rt = 1;
+ if (psr.ia64_psr.cpl == 2)
+ psr.ia64_psr.cpl = 0; // !!!! fool domain
// psr.pk = 1;
- //printf("returns 0x%016lx...\n",psr.i64);
+ //printk("returns 0x%016lx...\n",psr.i64);
return psr.i64;
}
ia64_rr rr;
rr.rrval = 0;
- rr.ps = vcpu_get_rr_ps(vcpu,ifa);
- rr.rid = vcpu_get_rr_rid(vcpu,ifa);
- return (rr.rrval);
+ rr.ps = vcpu_get_rr_ps(vcpu, ifa);
+ rr.rid = vcpu_get_rr_rid(vcpu, ifa);
+ return rr.rrval;
}
-
-IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_itir(VCPU * vcpu, u64 * pval)
{
- UINT64 val = PSCB(vcpu,itir);
+ u64 val = PSCB(vcpu, itir);
*pval = val;
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_iipa(VCPU * vcpu, u64 * pval)
{
- UINT64 val = PSCB(vcpu,iipa);
+ u64 val = PSCB(vcpu, iipa);
// SP entry code does not save iipa yet nor does it get
// properly delivered in the pscb
- // printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n");
+ // printk("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n");
*pval = val;
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_ifs(VCPU * vcpu, u64 * pval)
{
//PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs;
//*pval = PSCB(vcpu,regs).cr_ifs;
// a domain can differentiate whether it is running on SP or not
// Thus, writes of DCR should ignore the sign bit
//verbose("vcpu_set_dcr: called\n");
- PSCBX(vcpu,dcr) = val & ~0x8000000000000000L;
- return (IA64_NO_FAULT);
+ PSCBX(vcpu, dcr) = val & ~0x8000000000000000L;
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_iva(VCPU * vcpu, u64 val)
{
- if(VMX_DOMAIN(vcpu)){
- PSCB(vcpu,iva) = val & ~0x7fffL;
- }else{
- PSCBX(vcpu,iva) = val & ~0x7fffL;
- }
- return (IA64_NO_FAULT);
+ if (VMX_DOMAIN(vcpu))
+ PSCB(vcpu, iva) = val & ~0x7fffL;
+ else
+ PSCBX(vcpu, iva) = val & ~0x7fffL;
+
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_pta(VCPU * vcpu, u64 val)
{
if (val & IA64_PTA_LFMT) {
- printf("*** No support for VHPT long format yet!!\n");
+ printk("*** No support for VHPT long format yet!!\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
- if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT;
- if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu,pta) = val;
+ if (val & (0x3f << 9)) /* reserved fields */
+ return IA64_RSVDREG_FAULT;
+ if (val & 2) /* reserved fields */
+ return IA64_RSVDREG_FAULT;
+ PSCB(vcpu, pta) = val;
return IA64_NO_FAULT;
}
{
// SP entry code does not save iipa yet nor does it get
// properly delivered in the pscb
- // printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n");
+ // printk("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n");
- PSCB(vcpu,iipa) = val;
+ PSCB(vcpu, iipa) = val;
return IA64_NO_FAULT;
}
VCPU interrupt control register access routines
**************************************************************************/
-void vcpu_pend_unspecified_interrupt(VCPU *vcpu)
+void vcpu_pend_unspecified_interrupt(VCPU * vcpu)
{
- PSCB(vcpu,pending_interruption) = 1;
+ PSCB(vcpu, pending_interruption) = 1;
}
-void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
+void vcpu_pend_interrupt(VCPU * vcpu, u64 vector)
{
if (vector & ~0xff) {
- printf("vcpu_pend_interrupt: bad vector\n");
+ printk("vcpu_pend_interrupt: bad vector\n");
return;
}
if (vcpu->arch.event_callback_ip) {
- printf("Deprecated interface. Move to new event based "
- printk("Deprecated interface. Move to new event based solution\n");
++ printk("Deprecated interface. Move to new event based "
+ "solution\n");
return;
}
-
- if ( VMX_DOMAIN(vcpu) ) {
- set_bit(vector,VCPU(vcpu,irr));
+
+ if (VMX_DOMAIN(vcpu)) {
+ set_bit(vector, VCPU(vcpu, irr));
} else {
- set_bit(vector,PSCBX(vcpu,irr));
- PSCB(vcpu,pending_interruption) = 1;
+ set_bit(vector, PSCBX(vcpu, irr));
+ PSCB(vcpu, pending_interruption) = 1;
}
}
}
// have a pending,deliverable interrupt... see if it is masked
bitnum = ia64_fls(bits);
- //printf("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...\n",bitnum);
+ //printk("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...\n",bitnum);
- vector = bitnum+(i*64);
+ vector = bitnum + (i * 64);
mask = 1L << bitnum;
/* sanity check for guest timer interrupt */
- if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ if (vector == (PSCB(vcpu, itv) & 0xff)) {
uint64_t now = ia64_get_itc();
- if (now < PSCBX(vcpu,domain_itm)) {
+ if (now < PSCBX(vcpu, domain_itm)) {
// printk("Ooops, pending guest timer before its due\n");
- PSCBX(vcpu,irr[i]) &= ~mask;
+ PSCBX(vcpu, irr[i]) &= ~mask;
goto check_start;
}
}
- //printf("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...\n",vector);
+ //printk("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...\n",vector);
if (*r >= mask) {
// masked by equal inservice
- //printf("but masked by equal inservice\n");
+ //printk("but masked by equal inservice\n");
return SPURIOUS_VECTOR;
}
- if (PSCB(vcpu,tpr) & IA64_TPR_MMI) {
+ if (PSCB(vcpu, tpr) & IA64_TPR_MMI) {
// tpr.mmi is set
- //printf("but masked by tpr.mmi\n");
+ //printk("but masked by tpr.mmi\n");
return SPURIOUS_VECTOR;
}
- if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) {
+ if (((PSCB(vcpu, tpr) & IA64_TPR_MIC) + 15) >= vector) {
//tpr.mic masks class
- //printf("but masked by tpr.mic\n");
+ //printk("but masked by tpr.mic\n");
return SPURIOUS_VECTOR;
}
- //printf("returned to caller\n");
-
+ //printk("returned to caller\n");
return vector;
}
return IA64_NO_FAULT;
}
#ifdef HEARTBEAT_FREQ
- if (domid >= N_DOMS) domid = N_DOMS-1;
+ if (domid >= N_DOMS)
+ domid = N_DOMS - 1;
#if 0
- if (vector == (PSCB(vcpu,itv) & 0xff)) {
- if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) {
- printk("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
- domid, count[domid], nonclockcount[domid]);
- //count[domid] = 0;
- //dump_runq();
- }
+ if (vector == (PSCB(vcpu, itv) & 0xff)) {
+ if (!(++count[domid] & ((HEARTBEAT_FREQ * 1024) - 1))) {
- printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
++ printk("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
+ domid, count[domid], nonclockcount[domid]);
+ //count[domid] = 0;
+ //dump_runq();
+ }
}
#endif
- else nonclockcount[domid]++;
+ else
+ nonclockcount[domid]++;
#endif
// now have an unmasked, pending, deliverable vector!
// getting ivr has "side effects"
#ifdef IRQ_DEBUG
if (firsttime[vector]) {
- printf("*** First get_ivr on vector=%lu,itc=%lx\n",
+ printk("*** First get_ivr on vector=%lu,itc=%lx\n",
- vector,ia64_get_itc());
- firsttime[vector]=0;
+ vector, ia64_get_itc());
+ firsttime[vector] = 0;
}
#endif
/* if delivering a timer interrupt, remember domain_itm, which
i = vector >> 6;
mask = 1L << (vector & 0x3f);
- //printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %lu\n",vector);
+ //printk("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %lu\n",vector);
- PSCBX(vcpu,insvc[i]) |= mask;
- PSCBX(vcpu,irr[i]) &= ~mask;
+ PSCBX(vcpu, insvc[i]) |= mask;
+ PSCBX(vcpu, irr[i]) &= ~mask;
//PSCB(vcpu,pending_interruption)--;
*pval = vector;
return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_tpr(VCPU * vcpu, u64 * pval)
{
- *pval = PSCB(vcpu,tpr);
- return (IA64_NO_FAULT);
+ *pval = PSCB(vcpu, tpr);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_eoi(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_eoi(VCPU * vcpu, u64 * pval)
{
- *pval = 0L; // reads of eoi always return 0
- return (IA64_NO_FAULT);
+ *pval = 0L; // reads of eoi always return 0
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_irr0(VCPU * vcpu, u64 * pval)
{
*pval = PSCBX(vcpu, irr[0]);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_irr1(VCPU * vcpu, u64 * pval)
{
*pval = PSCBX(vcpu, irr[1]);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_irr2(VCPU * vcpu, u64 * pval)
{
*pval = PSCBX(vcpu, irr[2]);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_irr3(VCPU * vcpu, u64 * pval)
{
*pval = PSCBX(vcpu, irr[3]);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_itv(VCPU * vcpu, u64 * pval)
{
- *pval = PSCB(vcpu,itv);
- return (IA64_NO_FAULT);
+ *pval = PSCB(vcpu, itv);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_pmv(VCPU * vcpu, u64 * pval)
{
- *pval = PSCB(vcpu,pmv);
- return (IA64_NO_FAULT);
+ *pval = PSCB(vcpu, pmv);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_cmcv(VCPU * vcpu, u64 * pval)
{
- *pval = PSCB(vcpu,cmcv);
- return (IA64_NO_FAULT);
+ *pval = PSCB(vcpu, cmcv);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_lrr0(VCPU * vcpu, u64 * pval)
{
// fix this when setting values other than m-bit is supported
- printf("vcpu_get_lrr0: Unmasked interrupts unsupported\n");
+ printk("vcpu_get_lrr0: Unmasked interrupts unsupported\n");
*pval = (1L << 16);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_lrr1(VCPU * vcpu, u64 * pval)
{
// fix this when setting values other than m-bit is supported
- printf("vcpu_get_lrr1: Unmasked interrupts unsupported\n");
+ printk("vcpu_get_lrr1: Unmasked interrupts unsupported\n");
*pval = (1L << 16);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_lid(VCPU * vcpu, u64 val)
{
- printf("vcpu_set_lid: Setting cr.lid is unsupported\n");
+ printk("vcpu_set_lid: Setting cr.lid is unsupported\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
-IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_tpr(VCPU * vcpu, u64 val)
{
- if (val & 0xff00) return IA64_RSVDREG_FAULT;
- PSCB(vcpu,tpr) = val;
+ if (val & 0xff00)
+ return IA64_RSVDREG_FAULT;
+ PSCB(vcpu, tpr) = val;
/* This can unmask interrupts. */
if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
- return (IA64_NO_FAULT);
+ PSCB(vcpu, pending_interruption) = 1;
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_eoi(VCPU * vcpu, u64 val)
{
- UINT64 *p, bits, vec, bitnum;
+ u64 *p, bits, vec, bitnum;
int i;
- p = &PSCBX(vcpu,insvc[3]);
- for (i = 3; (i >= 0) && !(bits = *p); i--, p--);
+ p = &PSCBX(vcpu, insvc[3]);
+ for (i = 3; (i >= 0) && !(bits = *p); i--, p--)
+ ;
if (i < 0) {
- printf("Trying to EOI interrupt when none are in-service.\n");
+ printk("Trying to EOI interrupt when none are in-service.\n");
return IA64_NO_FAULT;
}
bitnum = ia64_fls(bits);
bits &= ~(1L << bitnum);
*p = bits;
/* clearing an eoi bit may unmask another pending interrupt... */
- if (!vcpu->vcpu_info->evtchn_upcall_mask) { // but only if enabled...
+ if (!vcpu->vcpu_info->evtchn_upcall_mask) { // but only if enabled...
// worry about this later... Linux only calls eoi
// with interrupts disabled
- printf("Trying to EOI interrupt with interrupts enabled\n");
+ printk("Trying to EOI interrupt with interrupts enabled\n");
}
if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
+ PSCB(vcpu, pending_interruption) = 1;
- //printf("YYYYY vcpu_set_eoi: Successful\n");
+ //printk("YYYYY vcpu_set_eoi: Successful\n");
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_lrr0(VCPU * vcpu, u64 val)
{
if (!(val & (1L << 16))) {
- printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
+ printk("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
// no place to save this state but nothing to do anyway
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_lrr1(VCPU * vcpu, u64 val)
{
if (!(val & (1L << 16))) {
- printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
+ printk("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
// no place to save this state but nothing to do anyway
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_itv(VCPU * vcpu, u64 val)
{
/* Check reserved fields. */
if (val & 0xef00)
local_irq_save(flags);
while (1) {
- //printf("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now);
+ //printk("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now);
ia64_set_itm(val);
- if (val > (now = ia64_get_itc())) break;
+ if (val > (now = ia64_get_itc()))
+ break;
val = now + epsilon;
epsilon <<= 1;
}
//UINT now = ia64_get_itc();
//if (val < now) val = now + 1000;
- //printf("*** vcpu_set_itm: called with %lx\n",val);
+ //printk("*** vcpu_set_itm: called with %lx\n",val);
- PSCBX(vcpu,domain_itm) = val;
+ PSCBX(vcpu, domain_itm) = val;
vcpu_set_next_timer(vcpu);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val)
+IA64FAULT vcpu_set_itc(VCPU * vcpu, u64 val)
{
#define DISALLOW_SETTING_ITC_FOR_NOW
#ifdef DISALLOW_SETTING_ITC_FOR_NOW
local_irq_disable();
if (olditm) {
- printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n", val,
-printk("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
- PSCBX(vcpu,domain_itm) = newnow + d;
++ printk("**** vcpu_set_itc(%lx): vitm changed to %lx\n", val,
+ newnow + d);
+ PSCBX(vcpu, domain_itm) = newnow + d;
}
local_cpu_data->itm_next = newnow + x;
- d = PSCBX(vcpu,domain_itm);
+ d = PSCBX(vcpu, domain_itm);
x = local_cpu_data->itm_next;
ia64_set_itc(newnow);
}
local_irq_enable();
#endif
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_itm(VCPU * vcpu, u64 * pval)
{
//FIXME: Implement this
- printf("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n");
+ printk("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n");
- return (IA64_NO_FAULT);
- //return (IA64_ILLOP_FAULT);
+ return IA64_NO_FAULT;
+ //return IA64_ILLOP_FAULT;
}
-IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval)
+IA64FAULT vcpu_get_itc(VCPU * vcpu, u64 * pval)
{
//TODO: Implement this
- printf("vcpu_get_itc: Getting ar.itc is unsupported\n");
+ printk("vcpu_get_itc: Getting ar.itc is unsupported\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
-void vcpu_pend_timer(VCPU *vcpu)
+void vcpu_pend_timer(VCPU * vcpu)
{
- UINT64 itv = PSCB(vcpu,itv) & 0xff;
+ u64 itv = PSCB(vcpu, itv) & 0xff;
- if (vcpu_timer_disabled(vcpu)) return;
+ if (vcpu_timer_disabled(vcpu))
+ return;
//if (vcpu_timer_inservice(vcpu)) return;
- if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) {
+ if (PSCBX(vcpu, domain_itm_last) == PSCBX(vcpu, domain_itm)) {
// already delivered an interrupt for this so
// don't deliver another
return;
psr.ia64_psr.bn = 1;
//psr.pk = 1; // checking pkeys shouldn't be a problem but seems broken
if (psr.ia64_psr.be) {
- printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+ printk("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
- PSCB(vcpu,incomplete_regframe) = 0; // is this necessary?
- ifs = PSCB(vcpu,ifs);
+ PSCB(vcpu, incomplete_regframe) = 0; // is this necessary?
+ ifs = PSCB(vcpu, ifs);
//if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
//if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
if (ifs & regs->cr_ifs & 0x8000000000000000L) {
//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr);
*pval = VHPT_addr;
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+IA64FAULT vcpu_ttag(VCPU * vcpu, u64 vadr, u64 * padr)
{
- printf("vcpu_ttag: ttag instruction unsupported\n");
+ printk("vcpu_ttag: ttag instruction unsupported\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
}
-int warn_region0_address = 0; // FIXME later: tie to a boot parameter?
+int warn_region0_address = 0; // FIXME later: tie to a boot parameter?
/* Return TRUE iff [b1,e1] and [b2,e2] partially or fully overlaps. */
-static inline int range_overlap (u64 b1, u64 e1, u64 b2, u64 e2)
+static inline int range_overlap(u64 b1, u64 e1, u64 b2, u64 e2)
{
return (b1 <= e2) && (e1 >= b2);
}
IA64FAULT fault;
fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
- if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB)
- {
+ if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
mask = itir_mask(itir);
*padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask);
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
- return vcpu_force_data_miss(vcpu,vadr);
+ return vcpu_force_data_miss(vcpu, vadr);
}
-IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
+IA64FAULT vcpu_tak(VCPU * vcpu, u64 vadr, u64 * key)
{
- printf("vcpu_tak: tak instruction unsupported\n");
+ printk("vcpu_tak: tak instruction unsupported\n");
- return (IA64_ILLOP_FAULT);
+ return IA64_ILLOP_FAULT;
// HACK ALERT: tak does a thash for now
//return vcpu_thash(vcpu,vadr,key);
}
// TODO: Should set Logical CPU state, not just physical
// NOTE: Writes to unimplemented PMC registers are discarded
#ifdef DEBUG_PFMON
- printf("vcpu_set_pmc(%x,%lx)\n", reg, val);
-printk("vcpu_set_pmc(%x,%lx)\n",reg,val);
++ printk("vcpu_set_pmc(%x,%lx)\n", reg, val);
#endif
- ia64_set_pmc(reg,val);
- return (IA64_NO_FAULT);
+ ia64_set_pmc(reg, val);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val)
+IA64FAULT vcpu_set_pmd(VCPU * vcpu, u64 reg, u64 val)
{
// TODO: Should set Logical CPU state, not just physical
// NOTE: Writes to unimplemented PMD registers are discarded
#ifdef DEBUG_PFMON
- printf("vcpu_set_pmd(%x,%lx)\n", reg, val);
-printk("vcpu_set_pmd(%x,%lx)\n",reg,val);
++ printk("vcpu_set_pmd(%x,%lx)\n", reg, val);
#endif
- ia64_set_pmd(reg,val);
- return (IA64_NO_FAULT);
+ ia64_set_pmd(reg, val);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+IA64FAULT vcpu_get_pmc(VCPU * vcpu, u64 reg, u64 * pval)
{
// NOTE: Reads from unimplemented PMC registers return zero
- UINT64 val = (UINT64)ia64_get_pmc(reg);
+ u64 val = (u64) ia64_get_pmc(reg);
#ifdef DEBUG_PFMON
- printf("%lx=vcpu_get_pmc(%x)\n", val, reg);
-printk("%lx=vcpu_get_pmc(%x)\n",val,reg);
++ printk("%lx=vcpu_get_pmc(%x)\n", val, reg);
#endif
*pval = val;
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
-IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+IA64FAULT vcpu_get_pmd(VCPU * vcpu, u64 reg, u64 * pval)
{
// NOTE: Reads from unimplemented PMD registers return zero
- UINT64 val = (UINT64)ia64_get_pmd(reg);
+ u64 val = (u64) ia64_get_pmd(reg);
#ifdef DEBUG_PFMON
- printf("%lx=vcpu_get_pmd(%x)\n", val, reg);
-printk("%lx=vcpu_get_pmd(%x)\n",val,reg);
++ printk("%lx=vcpu_get_pmd(%x)\n", val, reg);
#endif
*pval = val;
- return (IA64_NO_FAULT);
+ return IA64_NO_FAULT;
}
/**************************************************************************
vcpu_purge_tr_entry(&PSCBX(vcpu, dtlb));
- trp = &PSCBX(vcpu,dtrs[slot]);
+ trp = &PSCBX(vcpu, dtrs[slot]);
- //printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
+ //printk("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
- vcpu_set_tr_entry(trp,pte,itir,ifa);
- vcpu_quick_region_set(PSCBX(vcpu,dtr_regions),ifa);
+ vcpu_set_tr_entry(trp, pte, itir, ifa);
+ vcpu_quick_region_set(PSCBX(vcpu, dtr_regions), ifa);
/*
* FIXME According to spec, vhpt should be purged, but this
vcpu_purge_tr_entry(&PSCBX(vcpu, itlb));
- trp = &PSCBX(vcpu,itrs[slot]);
+ trp = &PSCBX(vcpu, itrs[slot]);
- //printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
+ //printk("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
- vcpu_set_tr_entry(trp,pte,itir,ifa);
- vcpu_quick_region_set(PSCBX(vcpu,itr_regions),ifa);
+ vcpu_set_tr_entry(trp, pte, itir, ifa);
+ vcpu_quick_region_set(PSCBX(vcpu, itr_regions), ifa);
/*
* FIXME According to spec, vhpt should be purged, but this
// FIXME: this is dangerous... vhpt_flush_address ensures these
// addresses never get flushed. More work needed if this
// ever happens.
- //printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
+ //printk("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
- if (logps > PAGE_SHIFT) vhpt_multiple_insert(vaddr,pte,logps);
- else vhpt_insert(vaddr,pte,logps<<2);
+ if (logps > PAGE_SHIFT)
+ vhpt_multiple_insert(vaddr, pte, logps);
+ else
+ vhpt_insert(vaddr, pte, logps << 2);
}
// even if domain pagesize is larger than PAGE_SIZE, just put
// PAGE_SIZE mapping in the vhpt for now, else purging is complicated
// FIXME: validate not flushing Xen addresses
// if (Xen address) return(IA64_ILLOP_FAULT);
// FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
- //printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
+ //printk("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
- check_xen_space_overlap ("ptc_ga", vadr, addr_range);
+ check_xen_space_overlap("ptc_ga", vadr, addr_range);
- domain_flush_vtlb_range (vcpu->domain, vadr, addr_range);
+ domain_flush_vtlb_range(vcpu->domain, vadr, addr_range);
return IA64_NO_FAULT;
}
panic("vhpt_init: bad VHPT alignment!\n");
__get_cpu_var(vhpt_paddr) = paddr;
__get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
- printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
+ printk("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
paddr, __get_cpu_var(vhpt_pend));
- pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL));
- vhpt_map(pte);
- ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
- VHPT_ENABLED);
- vhpt_erase();
+ vhpt_erase(paddr);
+ // we don't enable VHPT here.
+ // context_switch() or schedule_tail() does it.
+}
+
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+int
+pervcpu_vhpt_alloc(struct vcpu *v)
+{
+ unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
+
+ v->arch.vhpt_entries =
+ (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
+ v->arch.vhpt_page =
+ alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
+ if (!v->arch.vhpt_page)
+ return -ENOMEM;
+
+ v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
+ if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
+ panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
+
+ v->arch.pta.val = 0; // to zero reserved bits
+ v->arch.pta.ve = 1; // enable vhpt
+ v->arch.pta.size = VHPT_SIZE_LOG2;
+ v->arch.pta.vf = 1; // long format
+ v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
+
+ vhpt_erase(v->arch.vhpt_maddr);
+ smp_mb(); // per vcpu vhpt may be used by another physical cpu.
+ return 0;
}
+void
+pervcpu_vhpt_free(struct vcpu *v)
+{
+ free_domheap_pages(v->arch.vhpt_page, VHPT_SIZE_LOG2 - PAGE_SHIFT);
+}
+#endif
+
+void
+domain_purge_swtc_entries(struct domain *d)
+{
+ struct vcpu* v;
+ for_each_vcpu(d, v) {
+ if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+ continue;
+
+ /* Purge TC entries.
+ FIXME: clear only if match. */
+ vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v,itlb));
+ }
+}
+
+void
+domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
+ vcpumask_t vcpu_dirty_mask)
+{
+ int vcpu;
+
+ for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
+ struct vcpu* v = d->vcpu[vcpu];
+ if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+ continue;
+
+ /* Purge TC entries.
+ FIXME: clear only if match. */
+ vcpu_purge_tr_entry(&PSCBX(v, dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v, itlb));
+ }
+}
+// SMP: we can't assume v == current, vcpu might move to another physical cpu.
+// So memory barrier is necessary.
+// if we can guranttee that vcpu can run on only this physical cpu
+// (e.g. vcpu == current), smp_mb() is unnecessary.
void vcpu_flush_vtlb_all(struct vcpu *v)
{
if (VMX_DOMAIN(v)) {
--- /dev/null
- printf("xencomm_copy_from_guest: from=%lx+%u n=%u\n",
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Tristan Gingold <tristan.gingold@bull.net>
+ */
+
+#include <xen/config.h>
+#include <xen/mm.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/guest_access.h>
+#include <public/xen.h>
+#include <public/xencomm.h>
+#include <xen/errno.h>
+
+#undef DEBUG
+#ifdef DEBUG
+static int xencomm_debug = 1; /* extremely verbose */
+#else
+#define xencomm_debug 0
+#endif
+
+static int
+xencomm_copy_chunk_from(
+ unsigned long to,
+ unsigned long paddr,
+ unsigned int len)
+{
+ unsigned long maddr;
+ struct page_info *page;
+
+ while (1) {
+ maddr = xencomm_paddr_to_maddr(paddr);
+ if (xencomm_debug > 1)
+ printk("%lx[%d] -> %lx\n", maddr, len, to);
+ if (maddr == 0)
+ return -EFAULT;
+
+ page = virt_to_page(maddr);
+ if (get_page(page, current->domain) == 0) {
+ if (page_get_owner(page) != current->domain) {
+ /* This page might be a page granted by another domain */
+ panic_domain(NULL, "copy_from_guest from foreign domain\n");
+ }
+ /* Try again. */
+ continue;
+ }
+ memcpy((void *)to, (void *)maddr, len);
+ put_page(page);
+ return 0;
+ }
+}
+
+/**
+ * xencomm_copy_from_guest: Copy a block of data from domain space.
+ * @to: Machine address.
+ * @from: Physical address to a xencomm buffer descriptor.
+ * @n: Number of bytes to copy.
+ * @skip: Number of bytes from the start to skip.
+ *
+ * Copy data from domain to hypervisor.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+xencomm_copy_from_guest(
+ void *to,
+ const void *from,
+ unsigned int n,
+ unsigned int skip)
+{
+ struct xencomm_desc *desc;
+ unsigned long desc_addr;
+ unsigned int from_pos = 0;
+ unsigned int to_pos = 0;
+ unsigned int i = 0;
+
+ if (xencomm_debug)
- printf ("src_paddr=%lx i=%d, skip=%d\n",
++ printk("xencomm_copy_from_guest: from=%lx+%u n=%u\n",
+ (unsigned long)from, skip, n);
+
+ if (XENCOMM_IS_INLINE(from)) {
+ unsigned long src_paddr = XENCOMM_INLINE_ADDR(from);
+
+ src_paddr += skip;
+
+ while (n > 0) {
+ unsigned int chunksz;
+ unsigned int bytes;
+ int res;
+
+ chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE);
+
+ bytes = min(chunksz, n);
+
+ res = xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes);
+ if (res != 0)
+ return -EFAULT;
+ src_paddr += bytes;
+ to += bytes;
+ n -= bytes;
+ }
+
+ /* Always successful. */
+ return 0;
+ }
+
+ /* first we need to access the descriptor */
+ desc_addr = xencomm_paddr_to_maddr((unsigned long)from);
+ if (desc_addr == 0)
+ return -EFAULT;
+
+ desc = (struct xencomm_desc *)desc_addr;
+ if (desc->magic != XENCOMM_MAGIC) {
+ printk("%s: error: %p magic was 0x%x\n",
+ __func__, desc, desc->magic);
+ return -EFAULT;
+ }
+
+ /* iterate through the descriptor, copying up to a page at a time */
+ while ((to_pos < n) && (i < desc->nr_addrs)) {
+ unsigned long src_paddr = desc->address[i];
+ unsigned int pgoffset;
+ unsigned int chunksz;
+ unsigned int chunk_skip;
+
+ if (src_paddr == XENCOMM_INVALID) {
+ i++;
+ continue;
+ }
+
+ pgoffset = src_paddr % PAGE_SIZE;
+ chunksz = PAGE_SIZE - pgoffset;
+
+ chunk_skip = min(chunksz, skip);
+ from_pos += chunk_skip;
+ chunksz -= chunk_skip;
+ skip -= chunk_skip;
+
+ if (skip == 0) {
+ unsigned int bytes = min(chunksz, n - to_pos);
+ int res;
+
+ if (xencomm_debug > 1)
- printf ("xencomm_copy_to_guest: to=%lx+%u n=%u\n",
++ printk ("src_paddr=%lx i=%d, skip=%d\n",
+ src_paddr, i, chunk_skip);
+
+ res = xencomm_copy_chunk_from((unsigned long)to + to_pos,
+ src_paddr + chunk_skip, bytes);
+ if (res != 0)
+ return -EFAULT;
+
+ from_pos += bytes;
+ to_pos += bytes;
+ }
+
+ i++;
+ }
+
+ return n - to_pos;
+}
+
+static int
+xencomm_copy_chunk_to(
+ unsigned long paddr,
+ unsigned long from,
+ unsigned int len)
+{
+ unsigned long maddr;
+ struct page_info *page;
+
+ while (1) {
+ maddr = xencomm_paddr_to_maddr(paddr);
+ if (xencomm_debug > 1)
+ printk("%lx[%d] -> %lx\n", from, len, maddr);
+ if (maddr == 0)
+ return -EFAULT;
+
+ page = virt_to_page(maddr);
+ if (get_page(page, current->domain) == 0) {
+ if (page_get_owner(page) != current->domain) {
+ /* This page might be a page granted by another domain */
+ panic_domain(NULL, "copy_to_guest to foreign domain\n");
+ }
+ /* Try again. */
+ continue;
+ }
+ memcpy((void *)maddr, (void *)from, len);
+ put_page(page);
+ return 0;
+ }
+}
+
+/**
+ * xencomm_copy_to_guest: Copy a block of data to domain space.
+ * @to: Physical address to xencomm buffer descriptor.
+ * @from: Machine address.
+ * @n: Number of bytes to copy.
+ * @skip: Number of bytes from the start to skip.
+ *
+ * Copy data from hypervisor to domain.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+xencomm_copy_to_guest(
+ void *to,
+ const void *from,
+ unsigned int n,
+ unsigned int skip)
+{
+ struct xencomm_desc *desc;
+ unsigned long desc_addr;
+ unsigned int from_pos = 0;
+ unsigned int to_pos = 0;
+ unsigned int i = 0;
+
+ if (xencomm_debug)
++ printk ("xencomm_copy_to_guest: to=%lx+%u n=%u\n",
+ (unsigned long)to, skip, n);
+
+ if (XENCOMM_IS_INLINE(to)) {
+ unsigned long dest_paddr = XENCOMM_INLINE_ADDR(to);
+
+ dest_paddr += skip;
+
+ while (n > 0) {
+ unsigned int chunksz;
+ unsigned int bytes;
+ int res;
+
+ chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE);
+
+ bytes = min(chunksz, n);
+
+ res = xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes);
+ if (res != 0)
+ return res;
+
+ dest_paddr += bytes;
+ from += bytes;
+ n -= bytes;
+ }
+
+ /* Always successful. */
+ return 0;
+ }
+
+ /* first we need to access the descriptor */
+ desc_addr = xencomm_paddr_to_maddr((unsigned long)to);
+ if (desc_addr == 0)
+ return -EFAULT;
+
+ desc = (struct xencomm_desc *)desc_addr;
+ if (desc->magic != XENCOMM_MAGIC) {
+ printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
+ return -EFAULT;
+ }
+
+ /* iterate through the descriptor, copying up to a page at a time */
+ while ((from_pos < n) && (i < desc->nr_addrs)) {
+ unsigned long dest_paddr = desc->address[i];
+ unsigned int pgoffset;
+ unsigned int chunksz;
+ unsigned int chunk_skip;
+
+ if (dest_paddr == XENCOMM_INVALID) {
+ i++;
+ continue;
+ }
+
+ pgoffset = dest_paddr % PAGE_SIZE;
+ chunksz = PAGE_SIZE - pgoffset;
+
+ chunk_skip = min(chunksz, skip);
+ to_pos += chunk_skip;
+ chunksz -= chunk_skip;
+ skip -= chunk_skip;
+ dest_paddr += chunk_skip;
+
+ if (skip == 0) {
+ unsigned int bytes = min(chunksz, n - from_pos);
+ int res;
+
+ res = xencomm_copy_chunk_to(dest_paddr,
+ (unsigned long)from + from_pos, bytes);
+ if (res != 0)
+ return res;
+
+ from_pos += bytes;
+ to_pos += bytes;
+ }
+
+ i++;
+ }
+ return n - from_pos;
+}
+
+/* Offset page addresses in 'handle' to skip 'bytes' bytes. Set completely
+ * exhausted pages to XENCOMM_INVALID. */
+void *
+xencomm_add_offset(
+ void *handle,
+ unsigned int bytes)
+{
+ struct xencomm_desc *desc;
+ unsigned long desc_addr;
+ int i = 0;
+
+ if (XENCOMM_IS_INLINE(handle))
+ return (void *)((unsigned long)handle + bytes);
+
+ /* first we need to access the descriptor */
+ desc_addr = xencomm_paddr_to_maddr((unsigned long)handle);
+ if (desc_addr == 0)
+ return NULL;
+
+ desc = (struct xencomm_desc *)desc_addr;
+ if (desc->magic != XENCOMM_MAGIC) {
+ printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
+ return NULL;
+ }
+
+ /* iterate through the descriptor incrementing addresses */
+ while ((bytes > 0) && (i < desc->nr_addrs)) {
+ unsigned long dest_paddr = desc->address[i];
+ unsigned int pgoffset;
+ unsigned int chunksz;
+ unsigned int chunk_skip;
+
+ pgoffset = dest_paddr % PAGE_SIZE;
+ chunksz = PAGE_SIZE - pgoffset;
+
+ chunk_skip = min(chunksz, bytes);
+ if (chunk_skip == chunksz) {
+ /* exhausted this page */
+ desc->address[i] = XENCOMM_INVALID;
+ } else {
+ desc->address[i] += chunk_skip;
+ }
+ bytes -= chunk_skip;
+ }
+ return handle;
+}
+
+int
+xencomm_handle_is_null(
+ void *ptr)
+{
+ if (XENCOMM_IS_INLINE(ptr))
+ return XENCOMM_INLINE_ADDR(ptr) == 0;
+ else {
+ struct xencomm_desc *desc;
+ unsigned long desc_addr;
+
+ desc_addr = xencomm_paddr_to_maddr((unsigned long)ptr);
+ if (desc_addr == 0)
+ return 1;
+
+ desc = (struct xencomm_desc *)desc_addr;
+ return (desc->address[0] == XENCOMM_INVALID);
+ }
+}
/* first find highest page frame number */
max_page = 0;
efi_memmap_walk(find_max_pfn, &max_page);
- printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
+ printk("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
efi_print();
- heap_start = memguard_init(ia64_imva(&_end));
- printk("Before heap_start: %p\n", heap_start);
- heap_start = __va(init_boot_allocator(__pa(heap_start)));
- printk("After heap_start: %p\n", heap_start);
+ xen_heap_start = memguard_init(ia64_imva(&_end));
- printf("Before xen_heap_start: %p\n", xen_heap_start);
++ printk("Before xen_heap_start: %p\n", xen_heap_start);
+ xen_heap_start = __va(init_boot_allocator(__pa(xen_heap_start)));
- printf("After xen_heap_start: %p\n", xen_heap_start);
++ printk("After xen_heap_start: %p\n", xen_heap_start);
efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
efi_memmap_walk(xen_count_pages, &nr_pages);
u64 pte, u64 itir, u64 ifa, u64 rid);
/* Initialize vcpu regs. */
-extern void vcpu_init_regs (struct vcpu *v);
+extern void vcpu_init_regs(struct vcpu *v);
-static inline UINT64
-itir_ps(UINT64 itir)
+static inline u64 itir_ps(u64 itir)
{
- return ((itir >> 2) & 0x3f);
+ return ((itir >> 2) & 0x3f);
}
-static inline UINT64
-itir_mask(UINT64 itir)
+static inline u64 itir_mask(u64 itir)
{
- return (~((1UL << itir_ps(itir)) - 1));
+ return (~((1UL << itir_ps(itir)) - 1));
}
-static inline s64
-vcpu_get_next_timer_ns(VCPU *vcpu)
+static inline s64 vcpu_get_next_timer_ns(VCPU * vcpu)
{
- s64 vcpu_get_next_timer_ns;
- u64 d = PSCBX(vcpu, domain_itm);
- u64 now = ia64_get_itc();
+ s64 vcpu_get_next_timer_ns;
+ u64 d = PSCBX(vcpu, domain_itm);
+ u64 now = ia64_get_itc();
- if (d > now)
- vcpu_get_next_timer_ns = cycle_to_ns(d - now) + NOW();
- else
- vcpu_get_next_timer_ns = cycle_to_ns(local_cpu_data->itm_delta) + NOW();
+ if (d > now)
+ vcpu_get_next_timer_ns = cycle_to_ns(d - now) + NOW();
+ else
+ vcpu_get_next_timer_ns =
+ cycle_to_ns(local_cpu_data->itm_delta) + NOW();
- return vcpu_get_next_timer_ns;
+ return vcpu_get_next_timer_ns;
}
- #define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
+ #define verbose(a...) do {if (vcpu_verbose) printk(a);} while(0)
//#define vcpu_quick_region_check(_tr_regions,_ifa) 1
#define vcpu_quick_region_check(_tr_regions,_ifa) \